This notebook is a part of a series that documents the review analysis for the DRN Cell Types project.
This notebook contains the code used to perform unsupervised clustering on the 5-HT neurons in the scRNA-seq dataset from Zeisel et al. (2018). Clusters were then filtered and identified using similar methods used in the analysis of our DRN inDrop scRNA-seq dataset.
library(devtools)
library(useful)
Loading required package: ggplot2
library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(ggplot2)
library(reticulate)
library(Seurat)
Loading required package: cowplot
Attaching package: ‘cowplot’
The following object is masked from ‘package:ggplot2’:
ggsave
Loading required package: Matrix
library(stringr)
library(Matrix)
library(parallel)
library(ape)
wd <- "/Volumes/LaCie/Dropbox/Sabatini Lab/DRN Cell Types Project/DRN Cell Types Manuscript/Revisions (1)/RNA-seq/"
dRaphe.neurons <- readRDS(file.path(wd, "DRN_inDrop_neurons.rds"))
zeisel.ob <- readRDS(file.path(wd, "zeisel_Seurat_cholinergic_monoaminergic_peptidergic.rds"))
dim(dRaphe.neurons@data)
[1] 15941 2041
dim(zeisel.ob@data)
[1] 13825 3977
table(dRaphe.neurons@ident)
Peptidergic GABA-I GABA-II GABA-III GABA/Glu Glu-I Glu-II Glu-III
103 82 132 118 32 111 79 129
Glu-IV Glu-V DA-I DA-II DA-III 5-HT-I 5-HT-II 5-HT-III
78 29 96 118 230 178 186 125
5-HT-IV 5-HT-V
208 7
table(zeisel.ob@ident)
DECHO1 DEINH4 DEINH5 DEINH6 DEINH7 DEINH8 HBADR HBCHO3 HBCHO4 HBNOR HBSER1 HBSER2
114 57 308 12 182 120 40 72 52 23 105 109
HBSER3 HBSER4 HBSER5 HYPEP1 HYPEP2 HYPEP3 HYPEP4 HYPEP5 HYPEP6 HYPEP7 HYPEP8 MBDOP1
87 72 64 191 43 152 98 173 32 13 15 57
MBDOP2 MEGLU14 MEINH14 SCINH11 TECHO TEINH2 TEINH3
63 47 21 50 121 1339 145
huang.geneNames <- rownames(dRaphe.neurons@data)
zeisel.geneNames <- rownames(zeisel.ob@data)
length(huang.geneNames)
[1] 15941
length(zeisel.geneNames)
[1] 13825
length(intersect(huang.geneNames, zeisel.geneNames))
[1] 12043
zeisel.geneNames.fix <- str_replace(string = zeisel.geneNames,
pattern = "-",
replacement = ".")
length(intersect(huang.geneNames, zeisel.geneNames.fix))
[1] 12074
is.element("mt.Co2", huang.geneNames) & is.element("mt.Co2", zeisel.geneNames.fix)
[1] TRUE
head(setdiff(huang.geneNames, zeisel.geneNames.fix), 10)
[1] "X.343C11.2" "X0610007P14Rik" "X0610009B22Rik" "X0610009E02Rik" "X0610009L18Rik"
[6] "X0610009O20Rik" "X0610010F05Rik" "X0610012G03Rik" "X0610030E20Rik" "X0610031O16Rik"
head(setdiff(zeisel.geneNames.fix, huang.geneNames), 10)
[1] "Klf14" "5031425F14Rik" "Esm1" "Gm13530" "Pmch" "Tmprss6"
[7] "Acbd7" "Gnrh1" "A930003A15Rik" "Six6"
Fixing them now probably matters more for trying to merge and cluster the cells from both datasets together.
We will find the intersection after separating out the 5-HT neurons from each dataset.
zeisel.ob.SER <- SubsetData(object = zeisel.ob,
ident.use = c("HBSER1",
"HBSER2",
"HBSER3",
"HBSER4",
"HBSER5"),
subset.raw = TRUE)
dRaphe.neurons.5HT <- SubsetData(object = dRaphe.neurons,
ident.use = c("5-HT-I",
"5-HT-II",
"5-HT-III",
"5-HT-IV",
"5-HT-V"),
subset.raw = TRUE)
dim(dRaphe.neurons.5HT@data)
[1] 15941 704
dim(zeisel.ob.SER@data)
[1] 13825 437
summary(dRaphe.neurons.5HT@meta.data$nUMI)
Min. 1st Qu. Median Mean 3rd Qu. Max.
772 2270 4398 6068 9368 17906
summary(zeisel.ob.SER@meta.data$nUMI)
Min. 1st Qu. Median Mean 3rd Qu. Max.
324 7111 9256 8775 10866 20485
summary(dRaphe.neurons.5HT@meta.data$nGene)
Min. 1st Qu. Median Mean 3rd Qu. Max.
341 1130 1904 2140 3048 5110
summary(zeisel.ob.SER@meta.data$nGene)
Min. 1st Qu. Median Mean 3rd Qu. Max.
290 3093 3657 3407 3993 5434
dRaphe.neurons.5HT <- SubsetData(object = dRaphe.neurons,
ident.use = c("5-HT-I",
"5-HT-II",
"5-HT-III",
"5-HT-IV",
"5-HT-V"),
subset.raw = TRUE)
dim(dRaphe.neurons.5HT@data)
[1] 15941 704
zeisel.ob.SER <- NormalizeData(object = zeisel.ob.SER,
normalization.method = "LogNormalize",
scale.factor = 10000,
display.progress = FALSE)
dRaphe.neurons.5HT <- NormalizeData(object = dRaphe.neurons.5HT,
normalization.method = "LogNormalize",
scale.factor = 10000,
display.progress = FALSE)
huang.5HT.genes <- rownames(dRaphe.neurons.5HT@raw.data[(rowSums(as.matrix(dRaphe.neurons.5HT@raw.data))>0),])
zeisel.5HT.genes <- rownames(zeisel.ob.SER@raw.data[(rowSums(as.matrix(zeisel.ob.SER@raw.data))>0),])
genes.common <- intersect(huang.5HT.genes, zeisel.5HT.genes)
length(genes.common)
[1] 11834
is.element("Tph2", genes.common)
[1] TRUE
is.element("En1", genes.common)
[1] TRUE
is.element("Fev", genes.common)
[1] TRUE
is.element("Pdyn", genes.common)
[1] TRUE
is.element("Trh", genes.common)
[1] TRUE
is.element("Cbln2", genes.common)
[1] TRUE
is.element("Met", genes.common)
[1] TRUE
zeisel.cluster.SER.avg <- AverageExpression(object = zeisel.ob.SER,
genes.use = genes.common,
return.seurat = TRUE,
show.progress = FALSE)
dim(zeisel.cluster.SER.avg@data)
[1] 11834 5
huang.5HT.subtype.avg <- AverageExpression(object = dRaphe.neurons.5HT,
genes.use = genes.common,
return.seurat = TRUE,
show.progress = FALSE)
dim(huang.5HT.subtype.avg@data)
[1] 11834 5
dRaphe.neurons <- SetIdent(object = dRaphe.neurons,
cells.use = WhichCells(object = dRaphe.neurons,
ident = c("5-HT-I",
"5-HT-II",
"5-HT-III",
"5-HT-IV",
"5-HT-V")),
ident.use = "5-HT")
dRaphe.neurons <- SetIdent(object = dRaphe.neurons,
cells.use = WhichCells(object = dRaphe.neurons,
ident = c("DA-I",
"DA-II",
"DA-III")),
ident.use = "DA")
dRaphe.neurons <- SetIdent(object = dRaphe.neurons,
cells.use = WhichCells(object = dRaphe.neurons,
ident = c("GABA-I",
"GABA-II",
"GABA-III")),
ident.use = "GABA")
dRaphe.neurons <- SetIdent(object = dRaphe.neurons,
cells.use = WhichCells(object = dRaphe.neurons,
ident = c("Glu-I",
"Glu-II",
"Glu-III",
"Glu-IV",
"Glu-V")),
ident.use = "Glu")
dRaphe.neurons.avg <- AverageExpression(object = dRaphe.neurons,
genes.use = genes.common,
return.seurat = TRUE,
show.progress = FALSE)
dim(dRaphe.neurons.avg@data)
[1] 11834 6
Check correlation coefficients:
cor(x = as.vector(dRaphe.neurons.avg@data[,"5-HT"]),
y = as.vector(zeisel.cluster.SER.avg@data[,"HBSER1"]),
method = "pearson")
[1] 0.8072475
cor(x = as.vector(dRaphe.neurons.avg@data[,"5-HT"]),
y = as.vector(zeisel.cluster.SER.avg@data[,"HBSER2"]),
method = "pearson")
[1] 0.8363493
cor(x = as.vector(dRaphe.neurons.avg@data[,"5-HT"]),
y = as.vector(zeisel.cluster.SER.avg@data[,"HBSER3"]),
method = "pearson")
[1] 0.7996928
cor(x = as.vector(dRaphe.neurons.avg@data[,"5-HT"]),
y = as.vector(zeisel.cluster.SER.avg@data[,"HBSER4"]),
method = "pearson")
[1] 0.8160603
cor(x = as.vector(dRaphe.neurons.avg@data[,"5-HT"]),
y = as.vector(zeisel.cluster.SER.avg@data[,"HBSER5"]),
method = "pearson")
[1] 0.8154013
cor(x = as.vector(zeisel.cluster.SER.avg@data[,"HBSER1"]),
y = as.vector(zeisel.cluster.SER.avg@data[,"HBSER2"]),
method = "pearson")
[1] 0.9488745
cor(x = as.vector(zeisel.cluster.SER.avg@data[,"HBSER1"]),
y = as.vector(zeisel.cluster.SER.avg@data[,"HBSER3"]),
method = "pearson")
[1] 0.9738681
cor(x = as.vector(zeisel.cluster.SER.avg@data[,"HBSER1"]),
y = as.vector(zeisel.cluster.SER.avg@data[,"HBSER4"]),
method = "pearson")
[1] 0.9209452
cor(x = as.vector(zeisel.cluster.SER.avg@data[,"HBSER1"]),
y = as.vector(zeisel.cluster.SER.avg@data[,"HBSER5"]),
method = "pearson")
[1] 0.9342197
Coefficients are all fairly high, probably because there are a lot of genes in common between all of the different 5-HT neuron clusters. Clustering the Zeisel dataset cells will probably be more informative.
zeisel.ob.SER <- ScaleData(object = zeisel.ob.SER,
vars.to.regress = c("nUMI", "percent.mito"),
model.use = "linear",
do.scale = TRUE,
scale.max = 10,
do.center = TRUE,
do.par = TRUE,
num.cores = 4,
display.progress = FALSE)
zeisel.ob.SER <- FindVariableGenes(object = zeisel.ob.SER,
mean.function = ExpMean,
dispersion.function = LogVMR,
x.low.cutoff = 0.075,
x.high.cutoff = 4,
y.cutoff = 0.5,
num.bin = 100)
Calculating gene means
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variance to mean ratios
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
length(zeisel.ob.SER@var.genes)
[1] 1862
zeisel.ob.SER <- RunPCA(object = zeisel.ob.SER,
pcs.compute = 30,
pcs.print = NA,
weight.by.var = FALSE)
DimHeatmap(object = zeisel.ob.SER,
reduction.type = "pca",
cells.use = length(zeisel.ob.SER@cell.names),
dim.use = 1:9,
do.balanced = TRUE)
DimHeatmap(object = zeisel.ob.SER,
reduction.type = "pca",
cells.use = length(zeisel.ob.SER@cell.names),
dim.use = 10:18,
do.balanced = TRUE)
PCElbowPlot(object= zeisel.ob.SER, num.pc = 30)
zeisel.ob.SER <- JackStraw(object = zeisel.ob.SER,
num.pc = 30,
do.par = TRUE,
num.cores = 4,
display.progress = FALSE)
JackStrawPlot(object = zeisel.ob.SER,
PCs = 1:30)
zeisel.ob.SER <- RunUMAP(object = zeisel.ob.SER,
reduction.use = "pca",
dims.use = 1:10,
n_neighbors = 30L,
min_dist = 0.5,
metric = "correlation")
DimPlot(object = zeisel.ob.SER,
reduction.use = "umap",
no.legend = TRUE,
do.label = TRUE,
pt.size = 3,
label.size = 10)
VlnPlot(object = zeisel.ob.SER,
features.plot = c("Slc6a4",
"Tph2",
"Fev",
"Htr1a",
"Slc22a3",
"Maob"),
nCol = 1,
point.size.use = 0,
size.x.use = 5,
size.y.use = 5,
x.lab.rot = TRUE)
VlnPlot(object = zeisel.ob.SER,
features.plot = c("En1",
"En2",
"Pou3f1",
"Met",
"Plxnb1",
"Pnoc",
"Htr2a"),
nCol = 1,
point.size.use = 0,
size.x.use = 5,
size.y.use = 5,
x.lab.rot = TRUE)
The R2-enriched gene Hoxa2 was not detected.
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("nUMI", "Snap25", "En1",
"Gabre", "Sema5a", "Kit",
"Hrh3", "Slc17a8", "Arhgap36"),
cols.use = c("gray", "red"))
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Slc6a4", "Tph2", "Fev",
"En1", "En2", "Pnoc",
"Slc17a8", "Pdyn", "Prkcq"),
cols.use = c("gray", "red"))
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Trh", "Asb4", "Cbln2",
"Gad1", "Gad2", "Tac1",
"Nr2f2", "Cdh4", "Oxtr"),
cols.use = c("gray", "red"))
resolution.vector <- c(0.5, 0.6, 0.8, 1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8)
nClusters <- rep(0, length(resolution.vector))
oobe.max <- rep(0, length(resolution.vector))
oobe.min <- rep(0, length(resolution.vector))
oobe.mean <- rep(0, length(resolution.vector))
for (i in 1:length(resolution.vector)) {
zeisel.ob.SER <- FindClusters(object = zeisel.ob.SER,
reduction.type = "pca",
dims.use = 1:10,
resolution = resolution.vector[i],
algorithm = 3,
n.iter = 10,
save.SNN = TRUE,
force.recalc = TRUE,
print.output = FALSE)
# Average expression and build cluster tree
zeisel.ob.SER.averages <- AverageExpression(object = zeisel.ob.SER,
return.seurat = TRUE,
show.progress = FALSE)
d <- dist(x = t(zeisel.ob.SER.averages@scale.data),
method = "euclidean")
hc <- hclust(d, method = "ward.D2")
oldIDs <- hc$labels[hc$order]
newIDs <- c(1:length(levels(zeisel.ob.SER@ident)))
zeisel.ob.SER@ident <- plyr::mapvalues(x = zeisel.ob.SER@ident,
from = oldIDs,
to = newIDs)
level.order <- order(as.numeric(levels(zeisel.ob.SER@ident)))
zeisel.ob.SER@ident <- factor(x = zeisel.ob.SER@ident,
levels = levels(zeisel.ob.SER@ident)[level.order],
ordered = TRUE)
# Rebuild and store the cluster tree on the new identities
zeisel.ob.SER.averages <- AverageExpression(object = zeisel.ob.SER,
return.seurat = TRUE,
show.progress = FALSE)
d <- dist(x = t(zeisel.ob.SER.averages@scale.data),
method = "euclidean")
hc <- hclust(d, method = "ward.D2")
zeisel.ob.SER@cluster.tree[[1]] <- as.phylo(hc)
nodes.oobe <- AssessNodes(object = zeisel.ob.SER,
genes.training = zeisel.ob.SER@var.genes)
nClusters[i] <- length(levels(zeisel.ob.SER@ident))
oobe.max[i] <- max(as.vector(nodes.oobe$oobe))
oobe.min[i] <- min(as.vector(nodes.oobe$oobe))
oobe.mean[i] <- mean(as.vector(nodes.oobe$oobe))
}
| | 0 % ~calculating
|+++++++++++++++++ | 33% ~03s
|++++++++++++++++++++++++++++++++++ | 67% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 04s
| | 0 % ~calculating
|+++++++++++++ | 25% ~04s
|+++++++++++++++++++++++++ | 50% ~02s
|++++++++++++++++++++++++++++++++++++++ | 75% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 04s
| | 0 % ~calculating
|++++++++++ | 20% ~05s
|++++++++++++++++++++ | 40% ~04s
|++++++++++++++++++++++++++++++ | 60% ~02s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 06s
| | 0 % ~calculating
|+++++++++ | 17% ~07s
|+++++++++++++++++ | 33% ~05s
|+++++++++++++++++++++++++ | 50% ~03s
|++++++++++++++++++++++++++++++++++ | 67% ~02s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 06s
| | 0 % ~calculating
|+++++++++ | 17% ~06s
|+++++++++++++++++ | 33% ~04s
|+++++++++++++++++++++++++ | 50% ~03s
|++++++++++++++++++++++++++++++++++ | 67% ~02s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 06s
| | 0 % ~calculating
|+++++++++ | 17% ~06s
|+++++++++++++++++ | 33% ~04s
|+++++++++++++++++++++++++ | 50% ~03s
|++++++++++++++++++++++++++++++++++ | 67% ~02s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 06s
| | 0 % ~calculating
|++++++++ | 14% ~08s
|+++++++++++++++ | 29% ~06s
|++++++++++++++++++++++ | 43% ~04s
|+++++++++++++++++++++++++++++ | 57% ~03s
|++++++++++++++++++++++++++++++++++++ | 71% ~02s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 07s
| | 0 % ~calculating
|++++++++ | 14% ~08s
|+++++++++++++++ | 29% ~06s
|++++++++++++++++++++++ | 43% ~04s
|+++++++++++++++++++++++++++++ | 57% ~03s
|++++++++++++++++++++++++++++++++++++ | 71% ~02s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 07s
| | 0 % ~calculating
|++++++++ | 14% ~08s
|+++++++++++++++ | 29% ~06s
|++++++++++++++++++++++ | 43% ~04s
|+++++++++++++++++++++++++++++ | 57% ~03s
|++++++++++++++++++++++++++++++++++++ | 71% ~02s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 07s
| | 0 % ~calculating
|+++++++ | 12% ~09s
|+++++++++++++ | 25% ~07s
|+++++++++++++++++++ | 38% ~05s
|+++++++++++++++++++++++++ | 50% ~04s
|++++++++++++++++++++++++++++++++ | 62% ~03s
|++++++++++++++++++++++++++++++++++++++ | 75% ~02s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 08s
| | 0 % ~calculating
|+++++++ | 12% ~09s
|+++++++++++++ | 25% ~07s
|+++++++++++++++++++ | 38% ~05s
|+++++++++++++++++++++++++ | 50% ~04s
|++++++++++++++++++++++++++++++++ | 62% ~03s
|++++++++++++++++++++++++++++++++++++++ | 75% ~02s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 08s
| | 0 % ~calculating
|+++++++ | 12% ~09s
|+++++++++++++ | 25% ~07s
|+++++++++++++++++++ | 38% ~05s
|+++++++++++++++++++++++++ | 50% ~04s
|++++++++++++++++++++++++++++++++ | 62% ~03s
|++++++++++++++++++++++++++++++++++++++ | 75% ~02s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 08s
clusteringResults <- data.frame(resolution.vector,
nClusters,
oobe.min,
oobe.max,
oobe.mean)
plot.nClusters <- ggplot(data = clusteringResults,
aes(x = resolution.vector,
y = nClusters)) + geom_line() + geom_point()
plot.oobe.min <- ggplot(data = clusteringResults,
aes(x = resolution.vector,
y = oobe.min)) + geom_line() + geom_point()
plot.oobe.max <- ggplot(data = clusteringResults,
aes(x = resolution.vector,
y = oobe.max)) + geom_line() + geom_point()
plot.oobe.mean <- ggplot(data = clusteringResults,
aes(x = resolution.vector,
y = oobe.mean)) + geom_line() + geom_point()
plot.list <- list(plot.nClusters,
plot.oobe.min,
plot.oobe.max,
plot.oobe.mean)
plot_grid(plotlist = plot.list, ncol = 2)
zeisel.ob.SER <- FindClusters(object = zeisel.ob.SER,
reduction.type = "pca",
dims.use = 1:10,
resolution = 1.5,
algorithm = 3,
n.iter = 10,
save.SNN = TRUE,
force.recalc = TRUE,
print.output = FALSE)
# Average expression and build cluster tree
zeisel.ob.SER.averages <- AverageExpression(object = zeisel.ob.SER,
return.seurat = TRUE,
show.progress = FALSE)
d <- dist(x = t(zeisel.ob.SER.averages@scale.data),
method = "euclidean")
hc <- hclust(d, method = "ward.D2")
oldIDs <- hc$labels[hc$order]
newIDs <- c(1:length(levels(zeisel.ob.SER@ident)))
zeisel.ob.SER@ident <- plyr::mapvalues(x = zeisel.ob.SER@ident,
from = oldIDs,
to = newIDs)
level.order <- order(as.numeric(levels(zeisel.ob.SER@ident)))
zeisel.ob.SER@ident <- factor(x = zeisel.ob.SER@ident,
levels = levels(zeisel.ob.SER@ident)[level.order],
ordered = TRUE)
# Rebuild and store the cluster tree on the new identities
zeisel.ob.SER.averages <- AverageExpression(object = zeisel.ob.SER,
return.seurat = TRUE,
show.progress = FALSE)
d <- dist(x = t(zeisel.ob.SER.averages@scale.data),
method = "euclidean")
hc <- hclust(d, method = "ward.D2")
zeisel.ob.SER@cluster.tree[[1]] <- as.phylo(hc)
PlotClusterTree(zeisel.ob.SER)
p1 <- DimPlot(object = zeisel.ob.SER,
reduction.use = "umap",
no.legend = TRUE,
do.label = TRUE,
pt.size = 3,
label.size = 10,
do.return = TRUE)
p2 <- DimPlot(object = zeisel.ob.SER,
reduction.use = "umap",
no.legend = TRUE,
do.label = TRUE,
pt.size = 3,
label.size = 10,
group.by = "zeisel.clusterNames",
do.return = TRUE)
plot_grid(p1, p2)
nodes.oobe <- AssessNodes(zeisel.ob.SER, genes.training = zeisel.ob.SER@var.genes)
| | 0 % ~calculating
|++++++++ | 14% ~08s
|+++++++++++++++ | 29% ~06s
|++++++++++++++++++++++ | 43% ~04s
|+++++++++++++++++++++++++++++ | 57% ~03s
|++++++++++++++++++++++++++++++++++++ | 71% ~02s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 07s
nodes.oobe <- nodes.oobe[order(nodes.oobe$oobe, decreasing = TRUE),]
nodes.oobe
nodes.oobe
VlnPlot(object = zeisel.ob.SER,
features.plot = c("nUMI",
"Slc6a4",
"Tph2",
"Fev",
"Htr1a",
"Slc22a3",
"Maob"),
nCol = 1,
point.size.use = 0,
size.x.use = 5,
size.y.use = 5,
x.lab.rot = TRUE)
VlnPlot(object = zeisel.ob.SER,
features.plot = c("En1",
"En2",
"Pou3f1",
"Met",
"Plxnb1",
"Pnoc",
"Htr2a"),
nCol = 1,
point.size.use = 0,
size.x.use = 5,
size.y.use = 5,
x.lab.rot = TRUE)
table(zeisel.ob.SER@ident)
1 2 3 4 5 6 7 8
38 67 50 20 25 58 103 76
markers.cluster <- FindAllMarkers(object = zeisel.ob.SER,
test.use = "MAST",
only.pos = TRUE)
..............................................................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
..............................................................
Done!
................................................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
................................................
Done!
.........................................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
.........................................
Done!
............................................................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
............................................................
Done!
..................................................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
..................................................
Done!
..............................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
..............................
Done!
................................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
................................
Done!
................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
................
Done!
markers.node <- FindAllMarkersNode(object = zeisel.ob.SER,
test.use = "MAST",
only.pos = FALSE)
............................................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
............................................
Done!
....................................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
....................................
Done!
................................................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
................................................
Done!
...........................................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
...........................................
Done!
......................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
......................
Done!
..........................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
..........................
Done!
....................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
....................
Done!
head(markers.cluster[markers.cluster$cluster==1,], 100)
Cluster 1 is highly enriched in ribosomal genes, and the separation seems likely to be an artifact. Should also check DE genes at node #10.
head(markers.cluster[markers.cluster$cluster==2,], 50)
Cluster 2 is likely a part of R1DR (dorsal DRN).
head(markers.cluster[markers.cluster$cluster==3,], 50)
Cluster 3 is likely a part of R1DR (ventral DRN).
head(markers.cluster[markers.cluster$cluster==4,], 50)
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Gpr101", "Trh", "Sst",
"Penk", "Trhr", "Reln",
"Ndnf", "Mc4r", "Pou3f2"),
cols.use = c("gray", "red"))
Cluster 4 gene expression matches the R6P cluster in Okaty et al. (2015).
head(markers.cluster[markers.cluster$cluster==5,], 50)
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Mal", "Cryab", "Fa2h",
"Kcnj10", "Enpp2", "Olig1",
"Mog", "Kcna1", "Nkx6-2"),
cols.use = c("gray", "red"))
Cells in cluster 5 look a lot like putative doublets, given the high expression of genes typically found in glial cells (particularly oligodendrocyte genes).
head(markers.cluster[markers.cluster$cluster==6,], 50)
Cluster 6 is likely to be R1MR (or R1DR/R2).
head(markers.cluster[markers.cluster$cluster==7,], 50)
Cluster 7 cells are likely to be R5 cells.
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Pgr15l", "Tac1", "Shox2",
"Chodl", "Nkx6-1", "Meis2",
"Islr2", "Arhgap36", "Trh"),
cols.use = c("gray", "red"))
head(markers.cluster[markers.cluster$cluster==8,], 50)
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Rorb", "Pcdh19", "Nr2f2",
"Tcf4", "Foxp1", "Esr2",
"Kit", "Sema5a", "Cntn3"),
cols.use = c("gray", "red"))
Cluster 8 is either R1MR or R2, likely R2 (MRN cells, either way).
Check expression of R3-enriched genes:
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Egr2", "Samd9l", "Plxnb1",
"Pdlim5", "Pcdh18", "Zfp560",
"Trpc5", "Sstr1", "Cdh9"),
cols.use = c("gray", "red"))
All cells have the same value of Egr2.All cells have the same value of Trpc5.
It’s possible that there aren’t enough R3 cells in this dataset to detect.
Check DRN 5-HT subtypes:
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("En1", "Adcy2", "Pdyn",
"Slc17a8", "Cbln2", "Arhgap36",
"Prkcq", "Gad1", "Trh"),
cols.use = c("gray", "red"))
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Hcrtr1", "Fbxo2", "Sorcs1",
"Crhr2", "Rprm", "Asb4",
"En2", "Trpc3", "Met"),
cols.use = c("gray", "red"))
head(markers.node[markers.node$cluster==9,], 50)
head(markers.node[markers.node$cluster==10,], 50)
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Gabre", "Sema5a", "Gabrq",
"Tac1", "Pou2f2", "Hrh3",
"En1", "Amigo2", "Kit"),
cols.use = c("gray", "red"))
Cluster 1 seems to be a collection of cells from different clusters (mixed expression of genes in R1/R2/R3/R5/R6 cells) that share “high expression”" on ribosomal genes.
head(markers.node[markers.node$cluster==14,], 50)
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Slc17a8", "Slit2", "Pcdh11x",
"Nos1", "Npy2r", "Crhr2",
"Pcdh15", "Maf", "Ntng1"),
cols.use = c("gray", "red"))
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Hs3st4", "Asb4", "Prph",
"Arhgap36", "Gad1", "Tmem176b",
"Sdk2", "Hcrtr1", "Esm1"),
cols.use = c("gray", "red"))
Node 14 is probably a split between dorsal and ventral DRN.
head(markers.node[markers.node$cluster==11,], 50)
head(markers.node[markers.node$cluster==12,], 50)
head(markers.node[markers.node$cluster==13,], 50)
head(markers.node[markers.node$cluster==15,], 50)
zeisel.ob.SER <- SubsetData(object = zeisel.ob.SER,
ident.remove = c(1, 5),
subset.raw = TRUE)
resolution.vector <- c(0.5, 0.6, 0.8, 1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8)
nClusters <- rep(0, length(resolution.vector))
oobe.max <- rep(0, length(resolution.vector))
oobe.min <- rep(0, length(resolution.vector))
oobe.mean <- rep(0, length(resolution.vector))
for (i in 1:length(resolution.vector)) {
zeisel.ob.SER <- FindClusters(object = zeisel.ob.SER,
reduction.type = "pca",
dims.use = 1:10,
resolution = resolution.vector[i],
algorithm = 3,
n.iter = 10,
save.SNN = TRUE,
force.recalc = TRUE,
print.output = FALSE)
# Average expression and build cluster tree
zeisel.ob.SER.averages <- AverageExpression(object = zeisel.ob.SER,
return.seurat = TRUE,
show.progress = FALSE)
d <- dist(x = t(zeisel.ob.SER.averages@scale.data),
method = "euclidean")
hc <- hclust(d, method = "ward.D2")
oldIDs <- hc$labels[hc$order]
newIDs <- c(1:length(levels(zeisel.ob.SER@ident)))
zeisel.ob.SER@ident <- plyr::mapvalues(x = zeisel.ob.SER@ident,
from = oldIDs,
to = newIDs)
level.order <- order(as.numeric(levels(zeisel.ob.SER@ident)))
zeisel.ob.SER@ident <- factor(x = zeisel.ob.SER@ident,
levels = levels(zeisel.ob.SER@ident)[level.order],
ordered = TRUE)
# Rebuild and store the cluster tree on the new identities
zeisel.ob.SER.averages <- AverageExpression(object = zeisel.ob.SER,
return.seurat = TRUE,
show.progress = FALSE)
d <- dist(x = t(zeisel.ob.SER.averages@scale.data),
method = "euclidean")
hc <- hclust(d, method = "ward.D2")
zeisel.ob.SER@cluster.tree[[1]] <- as.phylo(hc)
nodes.oobe <- AssessNodes(object = zeisel.ob.SER,
genes.training = zeisel.ob.SER@var.genes)
nClusters[i] <- length(levels(zeisel.ob.SER@ident))
oobe.max[i] <- max(as.vector(nodes.oobe$oobe))
oobe.min[i] <- min(as.vector(nodes.oobe$oobe))
oobe.mean[i] <- mean(as.vector(nodes.oobe$oobe))
}
| | 0 % ~calculating
|+++++++++++++++++++++++++ | 50% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 03s
| | 0 % ~calculating
|+++++++++++++++++ | 33% ~04s
|++++++++++++++++++++++++++++++++++ | 67% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 05s
| | 0 % ~calculating
|+++++++++++++ | 25% ~06s
|+++++++++++++++++++++++++ | 50% ~04s
|++++++++++++++++++++++++++++++++++++++ | 75% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 07s
| | 0 % ~calculating
|++++++++++ | 20% ~07s
|++++++++++++++++++++ | 40% ~05s
|++++++++++++++++++++++++++++++ | 60% ~03s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 08s
| | 0 % ~calculating
|++++++++++ | 20% ~07s
|++++++++++++++++++++ | 40% ~05s
|++++++++++++++++++++++++++++++ | 60% ~03s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 08s
| | 0 % ~calculating
|++++++++++ | 20% ~07s
|++++++++++++++++++++ | 40% ~05s
|++++++++++++++++++++++++++++++ | 60% ~03s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 08s
| | 0 % ~calculating
|++++++++++ | 20% ~07s
|++++++++++++++++++++ | 40% ~05s
|++++++++++++++++++++++++++++++ | 60% ~03s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 08s
| | 0 % ~calculating
|++++++++++ | 20% ~07s
|++++++++++++++++++++ | 40% ~05s
|++++++++++++++++++++++++++++++ | 60% ~03s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 08s
| | 0 % ~calculating
|+++++++++ | 17% ~09s
|+++++++++++++++++ | 33% ~07s
|+++++++++++++++++++++++++ | 50% ~05s
|++++++++++++++++++++++++++++++++++ | 67% ~03s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 09s
| | 0 % ~calculating
|+++++++++ | 17% ~09s
|+++++++++++++++++ | 33% ~06s
|+++++++++++++++++++++++++ | 50% ~05s
|++++++++++++++++++++++++++++++++++ | 67% ~03s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 09s
| | 0 % ~calculating
|+++++++++ | 17% ~09s
|+++++++++++++++++ | 33% ~06s
|+++++++++++++++++++++++++ | 50% ~05s
|++++++++++++++++++++++++++++++++++ | 67% ~03s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 10s
| | 0 % ~calculating
|++++++++ | 14% ~11s
|+++++++++++++++ | 29% ~08s
|++++++++++++++++++++++ | 43% ~06s
|+++++++++++++++++++++++++++++ | 57% ~05s
|++++++++++++++++++++++++++++++++++++ | 71% ~03s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 11s
clusteringResults <- data.frame(resolution.vector,
nClusters,
oobe.min,
oobe.max,
oobe.mean)
plot.nClusters <- ggplot(data = clusteringResults,
aes(x = resolution.vector,
y = nClusters)) + geom_line() + geom_point()
plot.oobe.min <- ggplot(data = clusteringResults,
aes(x = resolution.vector,
y = oobe.min)) + geom_line() + geom_point()
plot.oobe.max <- ggplot(data = clusteringResults,
aes(x = resolution.vector,
y = oobe.max)) + geom_line() + geom_point()
plot.oobe.mean <- ggplot(data = clusteringResults,
aes(x = resolution.vector,
y = oobe.mean)) + geom_line() + geom_point()
plot.list <- list(plot.nClusters,
plot.oobe.min,
plot.oobe.max,
plot.oobe.mean)
plot_grid(plotlist = plot.list, ncol = 2)
zeisel.ob.SER <- FindClusters(object = zeisel.ob.SER,
reduction.type = "pca",
dims.use = 1:10,
resolution = 1.1,
algorithm = 3,
n.iter = 10,
save.SNN = TRUE,
force.recalc = TRUE,
print.output = FALSE)
# Average expression and build cluster tree
zeisel.ob.SER.averages <- AverageExpression(object = zeisel.ob.SER,
return.seurat = TRUE,
show.progress = FALSE)
d <- dist(x = t(zeisel.ob.SER.averages@scale.data),
method = "euclidean")
hc <- hclust(d, method = "ward.D2")
oldIDs <- hc$labels[hc$order]
newIDs <- c(1:length(levels(zeisel.ob.SER@ident)))
zeisel.ob.SER@ident <- plyr::mapvalues(x = zeisel.ob.SER@ident,
from = oldIDs,
to = newIDs)
level.order <- order(as.numeric(levels(zeisel.ob.SER@ident)))
zeisel.ob.SER@ident <- factor(x = zeisel.ob.SER@ident,
levels = levels(zeisel.ob.SER@ident)[level.order],
ordered = TRUE)
# Rebuild and store the cluster tree on the new identities
zeisel.ob.SER.averages <- AverageExpression(object = zeisel.ob.SER,
return.seurat = TRUE,
show.progress = FALSE)
d <- dist(x = t(zeisel.ob.SER.averages@scale.data),
method = "euclidean")
hc <- hclust(d, method = "ward.D2")
zeisel.ob.SER@cluster.tree[[1]] <- as.phylo(hc)
PlotClusterTree(zeisel.ob.SER)
zeisel.ob.SER <- RunUMAP(object = zeisel.ob.SER,
reduction.use = "pca",
dims.use = 1:10,
n_neighbors = 30L,
min_dist = 0.5,
metric = "correlation")
p1 <- DimPlot(object = zeisel.ob.SER,
reduction.use = "umap",
no.legend = TRUE,
do.label = TRUE,
pt.size = 3,
label.size = 10,
do.return = TRUE)
p2 <- DimPlot(object = zeisel.ob.SER,
reduction.use = "umap",
no.legend = TRUE,
do.label = TRUE,
pt.size = 3,
label.size = 10,
group.by = "zeisel.clusterNames",
do.return = TRUE)
plot_grid(p1, p2)
p1
# QC, 5-HT markers
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("nUMI", "percent.mito", "Ddc",
"Slc18a2", "Tph2", "Slc6a4",
"Maob", "Slc22a3", "Qdpr"),
cols.use = c("gray", "red"))
# Zeisel et al. (2018), HBSER4 vs. HBSER5
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Lat2", "Gpr101", "Penk",
"Trh", "Sst", "Calcr",
"Shox2", "Chodl", "Prkcq"),
cols.use = c("gray", "red"))
# Zeisel et al. (2018), HBSER1 vs. HBSER3
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Satb2", "Nbl1", "Satb2",
"Lrrk1", "Wfdc12", "Kit",
"Kcns1", "Col25a1", "Il1r1"),
cols.use = c("gray", "red"))
# Zeisel et al. (2018), HBSER2
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Sostdc1", "Rasgrp1", "Gata3",
"Htr5b", "Sox1", "Pcsk5",
"Hs6st2", "Mbp", "Nos1"),
cols.use = c("gray", "red"))
# R1-DR
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("En1", "En2", "Satb2",
"Npy2r", "Pax5", "Rspo3",
"Foxa1", "Pcsk6", "Prph"),
cols.use = c("gray", "red"))
# R1-DR, dorsal/lateral vs. ventral/midline
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Asb4", "Gad1", "Prkcq",
"Trh", "Hcrtr1", "Pdyn",
"Rprm", "Slc17a8", "Cbln2"),
cols.use = c("gray", "red"))
# R1-MR
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Sox1", "Rorb", "Chrm2",
"Trpc5", "Esr2", "Epha4",
"Ppp1r17", "Pkd2l1", "Col19a1"),
cols.use = c("gray", "red"))
All cells have the same value of Trpc5.
# R2
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Rorb", "Pcdh19", "Nr2f2",
"Tcf4", "Foxp1", "Met",
"Kit", "Sema5a", "Cntn3"),
cols.use = c("gray", "red"))
#R5
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Chodl", "Shox2", "Meis2",
"Tac1", "Nkx6-1", "Grm4",
"Pnoc", "Perp", "Islr2"),
cols.use = c("gray", "red"))
# R6P
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Gpr101", "Sst", "Penk",
"Trhr", "Reln", "Ndnf",
"Mc4r", "Pou3f2", "Trh"),
cols.use = c("gray", "red"))
There appear to be additional subsets within clusters 2 and 3. We will explore this later, and go with the larger grouping for now and assign cluster identities by rhombomeres.
nodes.oobe <- AssessNodes(zeisel.ob.SER, genes.training = zeisel.ob.SER@var.genes)
| | 0 % ~calculating
|++++++++++ | 20% ~08s
|++++++++++++++++++++ | 40% ~05s
|++++++++++++++++++++++++++++++ | 60% ~03s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 08s
nodes.oobe <- nodes.oobe[order(nodes.oobe$oobe, decreasing = TRUE),]
nodes.oobe
Find genes enriched in each cluster
markers.cluster <- FindAllMarkers(object = zeisel.ob.SER,
test.use = "MAST",
only.pos = TRUE)
...........................................................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
...........................................................
Done!
............................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
............................
Done!
...............................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
...............................
Done!
...............
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
...............
Done!
.......................................................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
.......................................................
Done!
.............................................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
.............................................
Done!
markers.node <- FindAllMarkersNode(object = zeisel.ob.SER,
test.use = "MAST",
only.pos = FALSE)
......................................................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
......................................................
Done!
................................................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
................................................
Done!
......................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
......................
Done!
...........................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
...........................
Done!
....................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
....................
Done!
head(markers.cluster[markers.cluster$cluster==1,], 50)
head(markers.cluster[markers.cluster$cluster==2,], 50)
head(markers.cluster[markers.cluster$cluster==3,], 50)
head(markers.cluster[markers.cluster$cluster==4,], 50)
head(markers.cluster[markers.cluster$cluster==5,], 50)
head(markers.cluster[markers.cluster$cluster==6,], 50)
head(markers.node[markers.node$cluster==7,], 50)
head(markers.node[markers.node$cluster==8,], 50)
head(markers.node[markers.node$cluster==9,], 50)
head(markers.node[markers.node$cluster==10,], 50)
head(markers.node[markers.node$cluster==11,], 50)
Could not find any cells/clusters corresponding to R3 in this dataset.
Cluster 2 looks very similar to clusters 5 and 6. Compare cluster 2 vs. 5 & 6:
markers.compare <- FindMarkers(object = zeisel.ob.SER,
ident.1 = 2,
ident.2 = c(5, 6),
test.use = "MAST",
only.pos = FALSE)
................................................................
Done!
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
................................................................
Done!
head(markers.compare, 50)
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Eef1a1", "Rpl41", "Rps19",
"Rpl23a", "Rpl37a", "Rps28",
"Tmsb10", "mt-Co1", "Rps29"),
cols.use = c("gray", "red"))
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Cldn11", "Ptges3", "Elmod1",
"Gprasp2", "St13", "Nptn",
"Gmps", "Xist", "Klhl9"),
cols.use = c("gray", "red"))
Check other R1-MR vs. R1-DR genes:
FeaturePlot(object = zeisel.ob.SER,
reduction.use = "umap",
features.plot = c("Npy1r", "Sox14", "Epha4",
"Col14a1", "Robo1", "Ret",
"Sema3e", "Cxcl14", "Klhl14"),
cols.use = c("gray", "red"))
VlnPlot(object = zeisel.ob.SER,
features.plot = c("nUMI",
"nGene",
"percent.mito",
"Xist",
"Tph2"),
nCol = 1,
point.size.use = 0,
size.x.use = 12,
size.y.use = 5,
x.lab.rot = FALSE)
Clusters 5 and 6 appear to be similar to cluster 2, with the separation being driven by differences that are likely to be technical or batch effects. However, there is insufficient metadata to examine this or to try and regress out these effects. It is possible that the HBSER1 cluster is separated from the other 5-HT neuron clusters in the Zeisel et al. (2018) dataset due to the same technical artifacts.
Cluster 2 appears to be a mixture of various R1 neurons (mostly R1-DR), while cluster 4 contains several MRN 5-HT neuron types that include both R1-MR and R2 neurons. It is unclear if there are any R3 neurons in this cluster, since we cannot detect cells expressing R3-enriched genes.
table(zeisel.ob.SER@ident)
1 2 3 4 5 6
20 57 103 76 68 50
zeisel.ob.SER <- StashIdent(object = zeisel.ob.SER,
save.name = "clusterNumbers_filtered")
ids.current <- levels(zeisel.ob.SER@ident)
ids.rhombomere <- c("R6P (ROb/RPa)", #1
"R1 (DRN)", #2 -- probably R1-DR
"R5 (RMg)", #3
"R1/R2 (MRN)", #4 -- at least R1-MR and R2
"R1 (lateral DRN)", #5
"R1 (medial DRN)") #6
zeisel.ob.SER@ident <- plyr::mapvalues(x = zeisel.ob.SER@ident,
from = ids.current,
to = ids.rhombomere)
zeisel.ob.SER <- StashIdent(object = zeisel.ob.SER,
save.name = "clusterNames_byRhombomere")
p1 <- DimPlot(object = zeisel.ob.SER,
reduction.use = "umap",
no.legend = TRUE,
do.label = TRUE,
pt.size = 3,
label.size = 6,
do.return = TRUE)
p2 <- DimPlot(object = zeisel.ob.SER,
reduction.use = "umap",
no.legend = TRUE,
do.label = TRUE,
pt.size = 3,
label.size = 6,
group.by = "zeisel.clusterNames",
do.return = TRUE)
plot_grid(p1, p2)
table(zeisel.ob.SER@ident)
R6P (ROb/RPa) R1 (DRN) R5 (RMg) R1/R2 (MRN) R1 (lateral DRN) R1 (medial DRN)
20 57 103 76 68 50
saveRDS(object = zeisel.ob.SER,
file = "/Volumes/LaCie/Dropbox/Sabatini Lab/DRN Cell Types Project/DRN Cell Types Manuscript/Revisions (1)/RNA-seq/zeisel_Seurat_HBSER_clustered_filtered.rds")
MitoRiboRatiolibrary(loomR)
Loading required package: R6
Loading required package: hdf5r
Attaching package: ‘loomR’
The following object is masked from ‘package:dplyr’:
combine
The following object is masked from ‘package:devtools’:
create
lfile <- connect(filename = "/Volumes/LaCie/Dropbox/Sabatini Lab/DRN Cell Types Project/DRN Cell Types Manuscript/Revisions (1)/RNA-seq/l6_r3_cholinergic_monoaminergic_and_peptidergic_neurons.loom")
lfile
Class: loom
Filename: /Volumes/LaCie/Dropbox/Sabatini Lab/DRN Cell Types Project/DRN Cell Types Manuscript/Revisions (1)/RNA-seq/l6_r3_cholinergic_monoaminergic_and_peptidergic_neurons.loom
Access type: H5F_ACC_RDONLY
Attributes: CreationDate, last_modified
Listing:
name obj_type dataset.dims dataset.type_class
col_attrs H5I_GROUP <NA> <NA>
col_graphs H5I_GROUP <NA> <NA>
layers H5I_GROUP <NA> <NA>
matrix H5I_DATASET 3977 x 27998 H5T_FLOAT
row_attrs H5I_GROUP <NA> <NA>
row_graphs H5I_GROUP <NA> <NA>
hist(log2(lfile$col.attrs$MitoRiboRatio[]+1), breaks = 100)
Looks similar to the distribution of percent.mito. Might be worth adding this in and trying to include it as a latent variable to see if we can remove the effects driving the separation of cluster 2 from 5 and 6.
lfile$close_all()
The dataset from Zeisel et al. (2018) might not serve as a sutiable “reference” dataset for comparing with our inDrop scRNA-seq dataset for the following reasons:
5-HT neurons in the Zeisel dataset come from multiple serotonergic nuclei, not only the dorsal raphe.
The clustering of 5-HT neurons by Zeisel et al. does not match the 5-HT neuron subsets described in Okaty et al. (2015), and re-clustering on the 5-HT neurons alone shows that the HBSER1-5 clusters are likely to be a mixture of 5-HT neuron types when categorized by their developmental lineage (also explaining the unusual list of DE genes in the Zeisel et al. data portal).
There appear to be several sources of technical variation or batch effects that drive the separation of clusters that are most similar to R1-DR cells, but there is insufficient metadata to understand the sources of these artifacts.
There are only 118 DRN 5-HT neurons (R1-DR) remaining if we exclude cells that are putative doublets or clustered by technical artifacts, which is a much smaller sample size than our inDrop dataset.
The data from Okaty et al. (2015) will make for a better “reference”, although most of the data was obtained using bulk RNA-seq of the sorted/hand-picked cells, with a much smaller single-cell dataset.
However, the results of clustering the 5-HT neurons in the Zeisel et al. (2018) dataset already show some agreement with the findings we have made from our scRNA-seq data – the R1-DR neurons in the Zeisel et al. (2018) dataset also separate into 2 distinct groups that each have additional heterogeneity within them, which appears to be similar to the separation that we see between the 5-HT-I/II and 5-HT-III/IV groups.
Few cells in this dataset were found to be Met+, and appear in both the R1 (medial DRN) cluster and the MRN (R1/R2) cluster. These cells are probably similar to the 5-HT-V subtype, although it is difficult to determine if any of these Met+ cells are actually in the DRN baed on the transcriptomic data alone.
Machine specifications:
devtools::session_info()
Session info -----------------------------------------------------------------------------------------
setting value
version R version 3.4.4 (2018-03-15)
system x86_64, darwin15.6.0
ui RStudio (1.1.447)
language (EN)
collate en_US.UTF-8
tz America/New_York
date 2019-05-07
Packages ---------------------------------------------------------------------------------------------
package * version date source
abind 1.4-5 2016-07-21 CRAN (R 3.4.0)
acepack 1.4.1 2016-10-29 cran (@1.4.1)
ape * 5.1 2018-04-04 CRAN (R 3.4.4)
assertthat 0.2.0 2017-04-11 CRAN (R 3.4.0)
backports 1.1.2 2017-12-13 CRAN (R 3.4.3)
base * 3.4.4 2018-03-15 local
base64enc 0.1-3 2015-07-28 cran (@0.1-3)
bindr 0.1.1 2018-03-13 CRAN (R 3.4.4)
bindrcpp * 0.2.2 2018-03-29 CRAN (R 3.4.4)
Biobase 2.38.0 2017-10-31 Bioconductor
BiocGenerics 0.24.0 2017-10-31 Bioconductor
bit 1.1-14 2018-05-29 CRAN (R 3.4.4)
bit64 0.9-7 2017-05-08 CRAN (R 3.4.0)
bitops 1.0-6 2013-08-17 cran (@1.0-6)
broom 0.4.4 2018-03-29 CRAN (R 3.4.4)
caret 6.0-80 2018-05-26 CRAN (R 3.4.4)
caTools 1.17.1 2014-09-10 cran (@1.17.1)
checkmate 1.8.5 2017-10-24 CRAN (R 3.4.2)
class 7.3-14 2015-08-30 CRAN (R 3.4.4)
cluster 2.0.7-1 2018-04-09 CRAN (R 3.4.4)
codetools 0.2-15 2016-10-05 CRAN (R 3.4.4)
colorspace 1.3-2 2016-12-14 CRAN (R 3.4.0)
compiler 3.4.4 2018-03-15 local
cowplot * 0.9.2 2017-12-17 CRAN (R 3.4.3)
CVST 0.2-2 2018-05-26 CRAN (R 3.4.4)
data.table 1.10.4-3 2017-10-27 CRAN (R 3.4.2)
datasets * 3.4.4 2018-03-15 local
ddalpha 1.3.2 2018-04-08 CRAN (R 3.4.4)
DelayedArray 0.4.1 2017-11-07 Bioconductor
DEoptimR 1.0-8 2016-11-19 cran (@1.0-8)
devtools * 1.13.5 2018-02-18 CRAN (R 3.4.3)
diffusionMap 1.1-0 2014-02-20 cran (@1.1-0)
digest 0.6.15 2018-01-28 CRAN (R 3.4.3)
dimRed 0.1.0 2017-05-04 CRAN (R 3.4.0)
diptest 0.75-7 2016-12-05 cran (@0.75-7)
doSNOW 1.0.16 2017-12-13 CRAN (R 3.4.3)
dplyr * 0.7.5 2018-05-19 CRAN (R 3.4.4)
DRR 0.0.3 2018-01-06 CRAN (R 3.4.3)
dtw 1.20-1 2018-05-18 CRAN (R 3.4.4)
fitdistrplus 1.0-9 2017-03-24 CRAN (R 3.4.0)
flexmix 2.3-14 2017-04-28 cran (@2.3-14)
FNN 1.1 2013-07-31 cran (@1.1)
foreach 1.4.4 2017-12-12 CRAN (R 3.4.3)
foreign 0.8-70 2018-04-23 CRAN (R 3.4.4)
Formula 1.2-3 2018-05-03 CRAN (R 3.4.4)
fpc 2.1-11 2018-01-13 CRAN (R 3.4.3)
gdata 2.18.0 2017-06-06 cran (@2.18.0)
GenomeInfoDb 1.14.0 2017-10-31 Bioconductor
GenomeInfoDbData 1.0.0 2018-05-30 Bioconductor
GenomicRanges 1.30.3 2018-02-26 Bioconductor
geometry 0.3-6 2015-09-09 CRAN (R 3.4.0)
ggplot2 * 2.2.1 2016-12-30 CRAN (R 3.4.0)
ggridges 0.5.0 2018-04-05 CRAN (R 3.4.4)
glue 1.3.1 2019-03-12 cran (@1.3.1)
gower 0.1.2 2017-02-23 CRAN (R 3.4.0)
gplots 3.0.1 2016-03-30 cran (@3.0.1)
graphics * 3.4.4 2018-03-15 local
grDevices * 3.4.4 2018-03-15 local
grid 3.4.4 2018-03-15 local
gridExtra 2.3 2017-09-09 CRAN (R 3.4.1)
gtable 0.2.0 2016-02-26 CRAN (R 3.4.0)
gtools 3.5.0 2015-05-29 cran (@3.5.0)
hdf5r * 1.2.0 2019-04-20 CRAN (R 3.4.4)
Hmisc 4.1-1 2018-01-03 CRAN (R 3.4.3)
htmlTable 1.12 2018-05-26 CRAN (R 3.4.4)
htmltools 0.3.6 2017-04-28 cran (@0.3.6)
htmlwidgets 1.2 2018-04-19 CRAN (R 3.4.4)
ica 1.0-2 2018-05-24 CRAN (R 3.4.4)
igraph 1.2.1 2018-03-10 CRAN (R 3.4.4)
ipred 0.9-6 2017-03-01 CRAN (R 3.4.0)
IRanges 2.12.0 2017-10-31 Bioconductor
irlba 2.3.2 2018-01-11 CRAN (R 3.4.3)
iterators 1.0.9 2017-12-12 CRAN (R 3.4.3)
jsonlite 1.5 2017-06-01 CRAN (R 3.4.0)
kernlab 0.9-26 2018-04-30 CRAN (R 3.4.4)
KernSmooth 2.23-15 2015-06-29 CRAN (R 3.4.4)
knitr 1.20 2018-02-20 CRAN (R 3.4.3)
labeling 0.3 2014-08-23 CRAN (R 3.4.0)
lars 1.2 2013-04-24 cran (@1.2)
lattice 0.20-35 2017-03-25 CRAN (R 3.4.4)
latticeExtra 0.6-28 2016-02-09 cran (@0.6-28)
lava 1.6.1 2018-03-28 CRAN (R 3.4.4)
lazyeval 0.2.1 2017-10-29 CRAN (R 3.4.2)
lmtest 0.9-36 2018-04-04 CRAN (R 3.4.4)
loomR * 0.2.1.9000 2019-04-26 Github (mojaveazure/loomR@87a6866)
lubridate 1.7.4 2018-04-11 CRAN (R 3.4.4)
magic 1.5-8 2018-01-26 CRAN (R 3.4.3)
magrittr 1.5 2014-11-22 CRAN (R 3.4.0)
MASS 7.3-50 2018-04-30 CRAN (R 3.4.4)
MAST 1.4.1 2017-12-23 Bioconductor
Matrix * 1.2-14 2018-04-09 CRAN (R 3.4.4)
matrixStats 0.53.1 2018-02-11 CRAN (R 3.4.3)
mclust 5.4 2017-11-22 CRAN (R 3.4.3)
memoise 1.1.0 2017-04-21 CRAN (R 3.4.0)
metap 0.9 2018-04-25 CRAN (R 3.4.4)
methods * 3.4.4 2018-03-15 local
mixtools 1.1.0 2017-03-10 cran (@1.1.0)
mnormt 1.5-5 2016-10-15 cran (@1.5-5)
ModelMetrics 1.1.0 2016-08-26 cran (@1.1.0)
modeltools 0.2-21 2013-09-02 cran (@0.2-21)
munsell 0.4.3 2016-02-13 CRAN (R 3.4.0)
mvtnorm 1.0-7 2018-01-25 CRAN (R 3.4.3)
nlme 3.1-137 2018-04-07 CRAN (R 3.4.4)
nnet 7.3-12 2016-02-02 CRAN (R 3.4.4)
parallel * 3.4.4 2018-03-15 local
pbapply 1.4-0 2019-02-05 cran (@1.4-0)
pillar 1.2.3 2018-05-25 CRAN (R 3.4.4)
pkgconfig 2.0.1 2017-03-21 CRAN (R 3.4.0)
plyr 1.8.4 2016-06-08 CRAN (R 3.4.0)
png 0.1-7 2013-12-03 CRAN (R 3.4.0)
prabclus 2.2-6 2015-01-14 cran (@2.2-6)
prodlim 2018.04.18 2018-04-18 CRAN (R 3.4.4)
proxy 0.4-22 2018-04-08 CRAN (R 3.4.4)
psych 1.8.4 2018-05-06 CRAN (R 3.4.4)
purrr 0.2.5 2018-05-29 CRAN (R 3.4.4)
R.methodsS3 1.7.1 2016-02-16 cran (@1.7.1)
R.oo 1.22.0 2018-04-22 CRAN (R 3.4.4)
R.utils 2.6.0 2017-11-05 CRAN (R 3.4.2)
R6 * 2.4.0 2019-02-14 CRAN (R 3.4.4)
ranger 0.10.0 2018-05-29 CRAN (R 3.4.4)
RANN 2.5.1 2017-05-21 CRAN (R 3.4.0)
RColorBrewer 1.1-2 2014-12-07 CRAN (R 3.4.0)
Rcpp 0.12.17 2018-05-18 CRAN (R 3.4.4)
RcppRoll 0.2.2 2015-04-05 CRAN (R 3.4.0)
RCurl 1.95-4.10 2018-01-04 CRAN (R 3.4.3)
recipes 0.1.2 2018-01-11 CRAN (R 3.4.3)
reshape2 1.4.3 2017-12-11 CRAN (R 3.4.3)
reticulate * 1.11.1 2019-03-06 CRAN (R 3.4.4)
rlang 0.2.0 2018-02-20 CRAN (R 3.4.3)
robustbase 0.92-8 2017-11-01 CRAN (R 3.4.2)
ROCR 1.0-7 2015-03-26 cran (@1.0-7)
rpart 4.1-13 2018-02-23 CRAN (R 3.4.4)
rstudioapi 0.7 2017-09-07 CRAN (R 3.4.1)
Rtsne 0.13 2017-04-14 cran (@0.13)
S4Vectors 0.16.0 2017-10-31 Bioconductor
scales 0.5.0 2017-08-24 CRAN (R 3.4.1)
scatterplot3d 0.3-41 2018-03-14 CRAN (R 3.4.4)
SDMTools 1.1-221 2014-08-05 cran (@1.1-221)
segmented 0.5-3.0 2017-11-30 CRAN (R 3.4.3)
Seurat * 2.3.1 2018-05-05 CRAN (R 3.4.4)
sfsmisc 1.1-2 2018-03-05 CRAN (R 3.4.4)
snow 0.4-2 2016-10-14 CRAN (R 3.4.0)
splines 3.4.4 2018-03-15 local
stats * 3.4.4 2018-03-15 local
stats4 3.4.4 2018-03-15 local
stringi 1.4.3 2019-03-12 cran (@1.4.3)
stringr * 1.4.0 2019-02-10 cran (@1.4.0)
SummarizedExperiment 1.8.1 2017-12-19 Bioconductor
survival 2.42-3 2018-04-16 CRAN (R 3.4.4)
tclust 1.4-1 2018-05-24 CRAN (R 3.4.4)
tibble 1.4.2 2018-01-22 CRAN (R 3.4.3)
tidyr 0.8.1 2018-05-18 CRAN (R 3.4.4)
tidyselect 0.2.4 2018-02-26 CRAN (R 3.4.3)
timeDate 3043.102 2018-02-21 CRAN (R 3.4.3)
tools 3.4.4 2018-03-15 local
trimcluster 0.1-2 2012-10-29 cran (@0.1-2)
tsne 0.1-3 2016-07-15 cran (@0.1-3)
useful * 1.2.4 2018-05-21 CRAN (R 3.4.4)
utils * 3.4.4 2018-03-15 local
VGAM 1.0-5 2018-02-07 CRAN (R 3.4.3)
withr 2.1.2 2018-03-15 CRAN (R 3.4.4)
XVector 0.18.0 2017-10-31 Bioconductor
yaml 2.1.19 2018-05-01 CRAN (R 3.4.4)
zlibbioc 1.24.0 2017-10-31 Bioconductor
zoo 1.8-1 2018-01-08 CRAN (R 3.4.3)